Chapter 4 Data statistics

load("data/data.Rdata")

4.1 Sequencing reads statistics

sample_preprocessing %>% 
    summarise(Total=sum(reads_post_filt * 150 / 1000000000) %>% round(2), 
              mean=mean(reads_post_filt * 150 / 1000000000) %>% round(2),
              sd=sd(reads_post_filt * 150 / 1000000000) %>% round(2)) %>%
    unite("Average",mean, sd, sep = " ± ", remove = TRUE) %>%
    tt()
tinytable_42ta5h9ht7zq2ikhpxps
Total Average
712.18 7.83 ± 8.08

4.2 DNA fractions

sequence_fractions <- read_counts %>%
  pivot_longer(-genome, names_to = "sample", values_to = "value") %>%
  group_by(sample) %>%
  summarise(mags = sum(value)) %>%
    left_join(sample_preprocessing, by = join_by(sample == sample)) %>%
    select(sample,mags,bases_pre_filt,bases_post_filt,host_bases,metagenomic_bases) %>%
    mutate(mags_bases = mags*150) %>%
    mutate(lowqual_bases = bases_pre_filt - bases_post_filt) %>%
    mutate(unmapped_bases = metagenomic_bases - mags_bases) %>%
    mutate(unmapped_bases = ifelse(unmapped_bases < 0, 0, unmapped_bases)) %>%
    select(sample, lowqual_bases, host_bases, unmapped_bases, mags_bases)

sequence_fractions %>%
  mutate_at(vars(-sample), ~./1000000000) %>%
  rename("Sample"=1, "Low quality"=2, "Mapped to host"=3, "Unmapped"=4, "Mapped to MAGs"=5) %>%
  tt()
tinytable_mdkevdeimv61kkh0n76t
Sample Low quality Mapped to host Unmapped Mapped to MAGs
E01 1.9123757 1.7193909 0.50379365 1.83467325
E02 0.4353280 4.2158016 0.99338589 0.02671065
E03 1.3042313 1.7994078 6.42222236 0.01078875
E16 0.4224530 0.1025580 0.20197846 4.93816200
E18 4.7850204 6.9637290 24.05986617 0.52864665
E21 1.0156349 2.6368389 0.90981533 0.10217775
E24 3.7280695 1.1110971 23.81060939 2.34316305
E25 1.5376473 7.9804608 6.00871944 0.85221990
E27 0.4230030 0.0911577 1.95160083 0.44069160
E28 1.6079642 1.0844487 4.47911541 0.19568490
E30 1.7928992 1.4413299 4.00947792 5.12918235
E31 0.6816818 0.0320553 0.50570530 2.84763450
E34 2.1948529 0.8249310 11.67502744 2.54218275
E38 0.9838001 4.3969737 1.28190071 0.01802595
E43 0.4955380 2.8185273 0.82331661 0.00793485
E44 0.4234418 5.9270472 0.44183940 0.24959295
E45 1.5173952 0.8510214 1.95118882 0.84886650
E48 2.3197861 1.9469865 7.44564809 5.90044110
E56 0.5108719 0.0847401 1.09609707 1.82499015
E58 0.4880990 2.1264312 1.36506017 0.11759265
H04 2.5008506 27.4835352 13.14400663 0.03507165
H06 0.2841785 2.0398764 1.46660757 0.01427955
H07 1.4500404 10.9416210 3.39146197 0.01941135
H08 0.5011987 0.2131668 0.57356153 0.45570180
H09 1.2653958 10.8166410 7.00507483 0.15877860
H10 0.7942211 1.3782558 4.03545973 0.11882340
H12 2.5813911 8.4009948 4.51254046 0.84020835
H15 0.1917405 1.5658842 0.74185414 0.00826125
H16 2.1486462 4.0113864 9.56424671 3.69521370
H19 0.5557962 4.2411288 2.50314959 0.02286150
H20 0.9677842 4.3890999 3.78083553 0.07778835
H23 0.9420728 3.9595926 1.30102424 0.29551170
H25 1.0738812 7.7087100 3.42255644 0.68422815
H30 0.4902154 1.3423326 0.75352358 2.29694205
H31 1.8416387 9.7206468 4.43631049 0.48033270
H32 0.5427161 5.8415880 1.38975295 0.06290070
H34 1.3188004 12.4245048 5.12752821 0.02616405
H37 2.5737627 27.8754720 9.15837446 0.25084785
H39 0.2938515 2.4124689 0.75581958 0.02116935
H40 1.1315544 3.8592006 5.83728129 0.42791340
H43 0.7161526 6.3701718 2.50320009 0.03324300
H45 0.9215094 1.5407343 3.94772969 0.81685470
H47 0.9581476 5.6254728 2.15816743 0.32372385
P01 0.5733407 1.8086574 1.33450269 0.76149270
P05 1.1668645 9.3271338 0.63288528 0.36958890
P09 0.7574713 7.1642826 0.52521364 0.04058895
P14 0.5034011 3.8678286 0.03866504 0.64239540
P20 0.7666679 2.0253963 0.68358450 0.54081750
P25 0.2950900 4.5486294 0.50927300 0.38403045
P28 4.4616492 30.8235540 17.16905293 1.83789930
P33 3.3222731 33.2508960 7.86558092 0.43485525
P34 0.3459813 0.8618043 1.84351514 0.01439085
P36 1.2281449 10.5844332 0.28646419 0.71007735
P41 1.6479428 25.4247288 3.56446783 1.35787020
P43 3.4204299 3.2932512 7.78711290 13.30940625
P45 1.6790221 2.9649723 5.42286382 0.12008145
P47 0.4775963 2.8718187 1.06401988 0.80290395
P48 3.0192702 0.9945204 17.82268997 6.15753405
P49 0.8859881 15.7503030 0.32349316 0.16673670
P51 1.8101944 8.2580928 3.46629537 2.64862665
P53 1.5688483 24.5797020 2.14349384 0.12960855
P56 0.9749002 12.3956724 0.19719518 1.01352285
P58 0.7963638 6.4119048 1.11305236 0.07796910
P60 2.3930517 33.2359848 8.57518064 0.20491080
P64 0.2656280 0.3023889 1.17634560 0.47208750
P65 0.4114934 0.2113239 0.89754587 1.90757895
P67 1.7026024 17.3557032 0.62786285 2.02223145
P69 0.4073834 1.8827937 1.76632754 0.23484960
P72 0.2702016 3.9134910 0.57524887 0.20525790
P75 1.4392490 2.6274414 6.26632261 1.62440925
P78 0.9165986 2.0786076 1.22668730 0.48372150
P79 1.7376207 12.1993284 7.73575004 1.52807535
sequence_fractions %>%
    pivot_longer(!sample, names_to = "fraction", values_to = "value") %>%
    mutate(value = value / 1000000000) %>%
    mutate(fraction = factor(fraction, levels = c("lowqual_bases","host_bases","unmapped_bases","mags_bases"))) %>%
    ggplot(., aes(x = sample, y = value, fill=fraction)) +
        geom_bar(position="stack", stat = "identity") +
      scale_fill_manual(name="Sequence type",
                    breaks=c("lowqual_bases","host_bases","unmapped_bases","mags_bases"),
                    labels=c("Low quality","Mapped to host","Unmapped","Mapped to MAGs"),
                    values=c("#CCCCCC", "#bcdee1", "#d8b8a3","#93655c"))+
        labs(x = "Samples", y = "Amount of data (GB)") +
        theme_classic() +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size=6),legend.position = "bottom")

tinytable_wagg0i30bdu3sucwednl
species mean_host_perc sd_host_perc max_host_perc min_host_perc
Eb 24.0105851 26.0487087 77.523636 0.39599794
Ha 36.4671066 15.3892853 58.926064 6.53771139
Pk 56.5200310 29.2493571 92.136483 1.81265533
NA 0.5625856 0.4379119 1.079399 0.02651141